******************************************************
**   How accurately are we measuring the LGBT  		**
** 				population in Colombia? 			** 
**			Evidence from a list experiment         **
**												    **
** 		Andrés Ham, Ángela Guarin & Juanita Ruiz    **
** 			                                        **
** 			   						                **
**  Escuela de Gobierno - Alberto Lleras Camargo    **
****************************************************** 

*** THIS DOFILE REPLICATES THE MAIN TABLES AND FIGURES

*** THIS VERSION: 07/18/2023 ***

* Options
clear all
set more off
set scheme s1mono
cap log close

* Paths
if c(username)=="ham_andres" {
	cd "/Users/ham_andres/Library/CloudStorage/Dropbox/research/colombia/LGBTIQ+/doc/Labour Economics/R&R/Replication Files"
	global dir "/Users/ham_andres/Library/CloudStorage/Dropbox/research/colombia/LGBTIQ+/doc/Labour Economics/R&R/Replication Files"
	global out "/Users/ham_andres/Library/CloudStorage/Dropbox/research/colombia/LGBTIQ+/doc/Labour Economics/R&R/Replication Files/4. Figures"
	}
if c(username)=="j.ruiz" {
	cd "C:\Users\j.ruiz\Dropbox\Andrés\LGBTIQ+\Replication Files"
	global dir "C:\Users\j.ruiz\Dropbox\Andrés\LGBTIQ+\Replication Files"
	global out "C:\Users\j.ruiz\Dropbox\Andrés\LGBTIQ+\Replication Files\4. Figures"
	}

	
*-----------------------------------------------------------------------------*
*	Figure 1. Size of LGBT population from household surveys
*-----------------------------------------------------------------------------*
	
* Load data
use "$dir/1.GEIH_LGBT.dta", clear

*Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/5
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

*Create matrix
mat population=J(3,1,.)

* Generates regions (Bogotá / Other urban areas/  Rural areas)
gen reg1=.
replace reg1=1 if dpto==11
replace reg1=2 if clase==1 & dpto~=11
replace reg1=3 if clase==2 & dpto~=11
	

* LGBT Population by region and sex 
mat population_sex=J(3,2,.)

forvalues r=1/3 {
	
	summ lgbt_numerica [w=weight] if reg1==`r' & sexo==1
	mat population_sex[`r',1]=r(mean)
	
	summ lgbt_numerica [w=weight] if reg1==`r' & sexo==2
	mat population_sex[`r',2]=r(mean)	
}

* Figure 1
preserve
drop _all
svmat double population_sex

gen region=_n
ren population_sex1 lgbt_male
ren population_sex2 lgbt_female
label define regions 1 "Bogotá" 2 "Other urban areas" 3 "Rural areas"
label values region regions

replace lgbt_male=lgbt_male*100
replace lgbt_female=lgbt_female*100

graph bar lgbt_male lgbt_female, over(region, label(labsize(small))) 		///
	asyvars bar(1, color(ebblue)) bar(2, color(lavender)) bargap(10)		///
	blabel(bar, format(%4.2fc) position(inside)) 							///
	ytitle("Percentage") ylabel(0(1)4, format(%4.0fc) nogrid) 				///
	legend(rows(1) region(lstyle(none)) size(small) label(1 "Born male") label(2 "Born female")) 						///
	name(sex, replace)
graph export "$out/Figure1.pdf", replace	
restore

	
*----------------------------------------------------------------------------------*
*	Table 1. Gender identity and sexual orientation questions in household surveys
*----------------------------------------------------------------------------------*		
	
* In Excel file (from DANE guide).
	
	
*-------------------------------------------------------------------------------------------------*
*	Table 2. Conditional differences in labor market outcomes for self-reported LGBT population
*-------------------------------------------------------------------------------------------------*		

* First column (GEIH)		
use "$dir/1.GEIH_LGBT.dta", clear

* Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/4 //denominator changes according to the number of months included
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

* Otras variables
gen female=sexo==2
gen edad2=edad^2

* Labor market outcomes
gen log_earnings=ln(inglabo)

* Keeps Bogota (Bogotá: dpto 11)
keep if dpto==11

* Gay
gen gay=0
replace gay=1 if ident_sex==1 & atraccion==1 | sexo==1 & ident_sex==1 & atraccion==1
* Lesbian
gen lesb=0
replace lesb=1 if  ident_sex==2 & atraccion==2 | sexo==2 & ident_sex==2 & atraccion==2
* LG
gen lg=0
replace lg=1 if gay==1 | lesb==1
* Bisexual
gen bisex=0
replace bisex=1 if atraccion==3

* Locals
local outcomes "pea ocupado des_aux informal log_earnings"
local controls "edad edad2 i.estado_civil i.educ i.estrato per_hog"
	
foreach y of local outcomes {
		
	* ALL
	reghdfe `y' lgbt_numerica `controls' [w=weight], absorb(time) vce(robust)
	estimates store `y'_all
	
	* By type
	reghdfe `y' lg bisex trans_numerica  `controls' [w=weight], absorb(time) vce(robust)
	estimates store `y'_all_type	
	
	summ `y' [w=weight]
	estadd scalar mean=r(mean)
	
	* BORN MALE
	reghdfe `y' lgbt_numerica `controls' [w=weight] if female==1, absorb(time) vce(robust)
	estimates store `y'_female
	
	* By type
	reghdfe `y' lg bisex trans_numerica  `controls' if female==1 [w=weight], absorb(time) vce(robust)
	estimates store `y'_female_type		
	
	summ `y' [w=weight] if sexo==1
	estadd scalar mean=r(mean)	
	
	* BORN FEMALE
	reghdfe `y' lgbt_numerica `controls' [w=weight] if female==0, absorb(time) vce(robust)
	estimates store `y'_male
	
	* By type
	reghdfe `y' lg bisex trans_numerica  `controls' if female==0 [w=weight], absorb(time) vce(robust)
	estimates store `y'_male_type		
	
	summ `y' [w=weight] if sexo==2
	estadd scalar mean=r(mean)		
	
}
	
esttab pea_all ocupado_all des_aux_all informal_all log_earnings_all				///
	using "$out/Table2.csv", csv replace   											///		
	keep(lgbt_numerica) b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 					///
	nonotes nolines compress staraux 												///
	title("All") mtitles("LFP" "ER" "UR" "Informality" "Log Earnings")
	
esttab pea_all_type ocupado_all_type des_aux_all_type informal_all_type log_earnings_all_type	///
	using "$out/Table2.csv", csv append   														///		
	keep(lg bisex trans_numerica) b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 						///
	nonotes nolines compress staraux nomtitles nonumbers										///
	stats(mean r2_a N, label("Mean(y)" "Adjusted R2" "N") fmt(3 3 0))	
	
esttab pea_female ocupado_female des_aux_female informal_female log_earnings_female	///
	using "$out/Table2.csv", csv append   											///		
	keep(lgbt_numerica) b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 					///
	nonotes nolines compress staraux 												///
	title("Born female") mtitles("LFP" "ER" "UR" "Informality" "Log Earnings")		///
	stats(mean r2_a N, label("Mean(y)" "Adjusted R2" "N") fmt(3 3 0))

esttab pea_female_type ocupado_female_type des_aux_female_type informal_female_type log_earnings_female_type	///
	using "$out/Table2.csv", csv append   																		///		
	keep(lg bisex trans_numerica) b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 										///
	nonotes nolines compress staraux nomtitles nonumbers														///
	stats(mean r2_a N, label("Mean(y)" "Adjusted R2" "N") fmt(3 3 0))
	
esttab pea_male ocupado_male des_aux_male informal_male log_earnings_male			///
	using "$out/Table2.csv", csv append   											///		
	keep(lgbt_numerica) b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 					///
	nonotes nolines compress staraux 												///
	title("Born male") mtitles("LFP" "ER" "UR" "Informality" "Log Earnings")		///
	stats(mean r2_a N, label("Mean(y)" "Adjusted R2" "N") fmt(3 3 0))	
	
esttab pea_male_type ocupado_male_type des_aux_male_type informal_male_type log_earnings_male_type				///
	using "$out/Table2.csv", csv append   																		///		
	keep(lg bisex trans_numerica) b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) 										///
	nonotes nolines compress staraux nomtitles nonumbers														///
	stats(mean r2_a N, label("Mean(y)" "Adjusted R2" "N") fmt(3 3 0))		
	

*-----------------------------------------------------------------------------*
*	Table 3. Example of list experiment question for gender identity
*-----------------------------------------------------------------------------*		

* In Excel file.	

*-----------------------------------------------------------------------------*
*	Table 4. Representativeness of list experiment sample and balance tests
*-----------------------------------------------------------------------------*		
	
* First column (GEIH)		
use "$dir/1.GEIH_LGBT.dta", clear

* Crate balance table
mat balance=J(23,4,.)

* Update weights
gen weight_2021=pondera/12
gen weight_2022=pondera/5 //denominator changes according to the number of months included
gen weight=weight_2021
replace weight=weight_2022 if year==2022
egen time=group(year mes)

* Keeps Bogota (Bogotá: área 11, dpto 11)
keep if dpto==11

* Space variable for nice-looking tables
gen space=0

* Pre-processing

gen rango_edad=.
replace rango_edad=1 if edad>=18 & edad<=29
replace rango_edad=2 if edad>=30 & edad<=50
replace rango_edad=3 if edad>=51 & edad<=.
label var rango "Rango de edad"
label define rango 1"18-29" 2"30-50" 3">51"
label values rango_edad rango
tab rango_edad, g(age_)

gen female=sexo==2

gen est_civil=.
replace est_civil=1 if estado_civil==1 | estado_civil==2
replace est_civil=2 if estado_civil==3
replace est_civil=3 if estado_civil==6
replace est_civil=4 if estado_civil==4 | estado_civil==5
tab est_civil, g(civil_)

tab educ, g(nedu_)


* Variables 
local variables "pea ocupado des_aux space female space space age_1 age_2 age_3 space space civil_1 civil_2 civil_3 civil_4 space space nedu_1 nedu_2 nedu_3 nedu_4"

loc i=1
foreach x of local variables {
	
	* Mean GEIH
	summ `x' [aw=weight]
	mat balance[`i',1]=r(mean)
	
	loc ++i
}

* Observations
count 
mat balance[23,1]=r(N)


* Second and third columns (GEIH)	
use "$dir/2.List Experiment database.dta", clear
drop nedu_*

* Gen variable for nice looking tables
gen space=0

gen educ2=.
replace educ2=1 if educ>=1 & educ<=4
replace educ2=2 if educ==5
replace educ2=3 if educ==6
replace educ2=4 if educ==7

drop age_*
gen agegroups=.
replace agegroups=1 if edad>=18 & edad<=29
replace agegroups=2 if edad>=30 & edad<=50
replace agegroups=3 if edad>=51 & edad<.

tab agegroups, g(age_)

tab educ2, g(nedu_)

* Labor market outcomes
gen pea=0
replace pea=1 if actividad==1 | actividad==2

gen ocupado=0
replace ocupado=1 if actividad==1

gen desocupa=0
replace desocupa=1 if actividad==2

gen des_aux=desocupa/pea

* Variables 
local variables "pea ocupado des_aux space female space space age_1 age_2 age_3 space space civil_1 civil_2 civil_3 civil_4 space space nedu_1 nedu_2 nedu_3 nedu_4"


loc i=1
foreach x of local variables {	
	
	* Direct report
	summ `x' [aw=factor] if grupo==0 
	mat balance[`i',2]=r(mean)

	* Indirect report
	summ `x' [aw=factor] if grupo==1
	mat balance[`i',3]=r(mean)	

	* Difference
	reg `x' grupo i.zona [w=factor], r
	test grupo
	mat balance[`i',4]=r(p)	

	loc ++i
}

* Observations
count if grupo==0
mat balance[23,2]=r(N)

count if grupo==1
mat balance[23,3]=r(N)

preserve
drop _all
svmat double balance
export excel using "$out/Table4.xlsx", replace
restore

	
*-----------------------------------------------------------------------------*
*	Table 5. The effect of veiled report treatment on LGBT population size
*-----------------------------------------------------------------------------*		

* Load Data	
use "$dir/2.List Experiment database.dta", clear		
	
* Repetitions for bootstrap
global REPS=1000

* Empirical approach:
* Veiled report: y^v_qi
	// Number of 5 statements reported as true
* Direct report: y^d_qi= d_qi + c_qi
	//A. d_qi: =1 if the answer to the direct question is 1
	//B. c_qi: Number of 4 statemets reported as true
* Change in reporting coeficient: E[y^v_qi]-E[y^d_qi]
	//Larger coef suggests the existence of a social norm which makes
	//truthful reporting more costly

	
* 1. Build d_qi for each direct question
*Gender identity
gen d_1=0
replace d_1=1 if sexo_nace!=identidad & identidad!=.
*Same sex attraction
gen d_2=0
replace d_2=1 if sexo_nace==1 & orientacion==1 | identidad==1 & orientacion==1 | identidad==3 & orientacion==1 | sexo_nace==2 & orientacion==2 | identidad==2 & orientacion==2 | identidad==4 & orientacion==2 
*Both sexes
gen d_3=0
replace d_3=1 if orientacion==3


* 2. Build y^d_qi= d_qi + c_qi
local lists "1 2 3"
foreach i of local lists {
	gen ld_`i'= l`i'_dir + d_`i'
}

* Sample mean direct response with standard error
mat table5a=J(8,7,.)
local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	* Confidence intervals for means (Normal distribution)
	ci means d_`i' [w=factor]
	mat table5a[`r',1]=r(mean)
	mat table5a[`r'+1,1]=r(se)
	
loc r=`r'+3
}

* 3. Build y_qi= y^d_qi + y^v_qi
foreach i of local lists {
	gen l`i'=.
	replace l`i'= ld_`i' if grupo==0
	replace l`i'= l`i'_ind if grupo==1
}

* 4. Basic Regressions
local lists "1 2 3"


*** WITHOUT CONTROLS - Panel B ***
* Run regressions
loc r=1
foreach i of local lists{
	* Reg
	reg l`i' grupo [w=factor], r
	* Computes point estimates, se or p-val for linear comb of coef
	lincom grupo 
	mat table5a[`r',2]=r(estimate)
	mat table5a[`r'+1,2]=r(se)
	mat table5a[`r'+1,3]=r(p)
	
	loc ++i
	loc r=`r'+3
	}	
	

* Sum
do "$dir/3. Programs/bootstrap_truefrac.do"

bootstrap r(sum_l1) r(sum_l2) r(sum_l3), seed(54321) reps($REPS) nowarn: bs_tf
mat results=r(table)

local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	mat table5a[`r',4]=results[1,`i']
	mat table5a[`r'+1,4]=results[2,`i']
	mat table5a[`r'+1,5]=results[4,`i']
	
loc r=`r'+3
}


* Percentage increase
do "$dir/3. Programs/bootstrap_percchange.do"

bootstrap r(perc_l1) r(perc_l2) r(perc_l3), seed(54321) reps($REPS) nowarn: bs_perc
mat results=r(table)

local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	mat table5a[`r',6]=results[1,`i']
	mat table5a[`r'+1,6]=results[2,`i']
	mat table5a[`r'+1,7]=results[4,`i']
	
loc r=`r'+3
}

* Exports with asterisks
preserve
* Aqui va la lista completa de matrices 
loc asteriscos "table5a"

foreach a of local asteriscos {
	
	* Establece cuantos decimales se pone a las diferencias (default=3)
	loc decimales = 3

	* Carga matrices como datos para poder manipular texto
	drop _all
	svmat double `a'

	* Define las columnas donde estan las diferencias a las cuales le vamos a poner *
	loc D "2 4 6"
	
	foreach d of local D {
		
		* Identifica la columna con el p-valor (a la derecha de la diferencia)
		loc e = `d' + 1

		* Convierte a string para poner asteriscos
		gen aux`d'=string(`a'`d',"%15.`decimales'f") if `a'`d' ~= .

		* Pone un * si significativo al 10%
		replace aux`d' = "(" + aux`d' + ")" + "*" if `a'`e' < 0.10	
		
		* Pone otro * si significativo al 5%
		replace aux`d' = aux`d' + "*" if `a'`e' < 0.05	
		
		* Pone otro * si significativo al 1%
		replace aux`d' = aux`d' + "*" if `a'`e' < 0.01
		
		replace aux`d' = "" if aux`d' == "." 		
		move aux`d'  `a'`e'
		drop `a'`d' `a'`e'
		}

	
	* Exporta a excel		
	export excel using "$out/`a'.xlsx", replace

	}
restore

*** WITH CONTROLS - Panel B ***

gen educ2=.
replace educ2=1 if educ>=1 & educ<=4
replace educ2=2 if educ==5
replace educ2=3 if educ==6
replace educ2=4 if educ==7

loc controls "i.sexo_nace i.rango_edad i.est_civil i.educ2 i.zona"
	
* Sample mean direct response with standard error
mat table5b=J(8,7,.)
local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	ci means d_`i' [w=factor]
	mat table5b[`r',1]=r(mean)
	mat table5b[`r'+1,1]=r(se)
	
loc r=`r'+3
}

* 4. Basic Regressions
local lists "1 2 3"

* Run regressions
loc r=1
foreach i of local lists{
	reg l`i' grupo `controls' [w=factor], r
		
	lincom grupo
	mat table5b[`r',2]=r(estimate)
	mat table5b[`r'+1,2]=r(se)
	mat table5b[`r'+1,3]=r(p)
	
	loc ++i
	loc r=`r'+3
}	
	

* Sum
do "$dir/3. Programs/bootstrap_truefrac.do"

bootstrap r(sum_l1) r(sum_l2) r(sum_l3), seed(54321) reps($REPS) nowarn: bs_tf, controls(`controls')
mat results=r(table)

local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	mat table5b[`r',4]=results[1,`i']
	mat table5b[`r'+1,4]=results[2,`i']
	mat table5b[`r'+1,5]=results[4,`i']
	
loc r=`r'+3
}


* Percentage increase
do "$dir/3. Programs/bootstrap_percchange.do"

bootstrap r(perc_l1) r(perc_l2) r(perc_l3), seed(54321) reps($REPS) nowarn: bs_perc, controls(`controls')
mat results=r(table)

local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	mat table5b[`r',6]=results[1,`i']
	mat table5b[`r'+1,6]=results[2,`i']
	mat table5b[`r'+1,7]=results[4,`i']
	
loc r=`r'+3
}

* Exports with asterisks

* Aqui va la lista completa de matrices 
loc asteriscos "table5b"

preserve
foreach a of local asteriscos {
	
	* Establece cuantos decimales se pone a las diferencias (default=3)
	loc decimales = 3

	* Carga matrices como datos para poder manipular texto
	drop _all
	svmat double `a'

	* Define las columnas donde estan las diferencias a las cuales le vamos a poner *
	loc D "2 4 6"
	
	foreach d of local D {
		
		* Identifica la columna con el p-valor (a la derecha de la diferencia)
		loc e = `d' + 1

		gen aux`d'=string(`a'`d',"%15.`decimales'f") if `a'`d' ~= .

		* Pone un * si significativo al 10%
		replace aux`d' = "(" + aux`d' + ")" + "*" if `a'`e' < 0.10	
		
		* Pone otro * si significativo al 5%
		replace aux`d' = aux`d' + "*" if `a'`e' < 0.05	
		
		* Pone otro * si significativo al 1%
		replace aux`d' = aux`d' + "*" if `a'`e' < 0.01
		
		replace aux`d' = "" if aux`d' == "." 		
		move aux`d'  `a'`e'
		drop `a'`d' `a'`e'
		}

	
	* Exporta a excel		
	export excel using "$out/`a'.xlsx", replace

	}
restore
	
	
*-----------------------------------------------------------------------------*
*	Table 6. The effect of veiled report treatment on LGBT population size
*							by sex at birth
*-----------------------------------------------------------------------------*	
use "$dir/2.List Experiment database.dta", clear	

* Repetitions for bootstrap
global REPS=1000

*-------------------------------------------------*
* 1. Estimate the change in reporting coefficient 
*-------------------------------------------------*

* 1. Build d_qi for each direct question
*Gender identity
gen d_1=0
replace d_1=1 if sexo_nace!=identidad & identidad!=.
*Same sex attraction
gen d_2=0
replace d_2=1 if sexo_nace==1 & orientacion==1 | identidad==1 & orientacion==1 | identidad==3 & orientacion==1 | sexo_nace==2 & orientacion==2 | identidad==2 & orientacion==2 | identidad==4 & orientacion==2 
	//Should we just take into account identity and not sex?
	//Sexual orientation is supposed to be associated with gender
*Both sexes
gen d_3=0
replace d_3=1 if orientacion==3


* 2. Build y^d_qi= d_qi + c_qi
local lists "1 2 3"
foreach i of local lists {
	gen ld_`i'= l`i'_dir + d_`i'
}

gen educ2=.
replace educ2=1 if educ>=1 & educ<=4
replace educ2=2 if educ==5
replace educ2=3 if educ==6
replace educ2=4 if educ==7

*** BORN MALE ***

loc controls "i.rango_edad i.est_civil i.educ2 i.zona"

* Sample mean direct response with standard error
mat table6a=J(8,7,.)
local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	ci means d_`i' [w=factor] if sexo_nace==1
	mat table6a[`r',1]=r(mean)
	mat table6a[`r'+1,1]=r(se)
	
loc r=`r'+3
}

* 3. Build y_qi= y^d_qi + y^v_qi
foreach i of local lists {
	gen l`i'=.
	replace l`i'= ld_`i' if grupo==0
	replace l`i'= l`i'_ind if grupo==1
}

* 4. Basic Regressions
local lists "1 2 3"

* Run regressions
loc r=1
foreach i of local lists{
	reg l`i' grupo `controls' [w=factor] if sexo_nace==1, r
		
	lincom grupo
	mat table6a[`r',2]=r(estimate)
	mat table6a[`r'+1,2]=r(se)
	mat table6a[`r'+1,3]=r(p)
	
	loc ++i
	loc r=`r'+3
	}	
	

* Sum
do "$dir/3. Programs/bootstrap_truefrac.do"

bootstrap r(sum_l1) r(sum_l2) r(sum_l3), seed(54321) reps($REPS) nowarn: bs_tf if sexo_nace==1, controls(`controls')
mat results=r(table)

local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	mat table6a[`r',4]=results[1,`i']
	mat table6a[`r'+1,4]=results[2,`i']
	mat table6a[`r'+1,5]=results[4,`i']
	
loc r=`r'+3
}


* Percentage increase
do "$dir/3. Programs/bootstrap_percchange.do"

bootstrap r(perc_l1) r(perc_l2) r(perc_l3), seed(54321) reps($REPS) nowarn: bs_perc if sexo_nace==1, controls(`controls')
mat results=r(table)

local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	mat table6a[`r',6]=results[1,`i']
	mat table6a[`r'+1,6]=results[2,`i']
	mat table6a[`r'+1,7]=results[4,`i']
	
loc r=`r'+3
}

* Exports with asterisks
preserve
* Aqui va la lista completa de matrices 
loc asteriscos "table6a"

foreach a of local asteriscos {
	
	* Establece cuantos decimales se pone a las diferencias (default=3)
	loc decimales = 3

	* Carga matrices como datos para poder manipular texto
	drop _all
	svmat double `a'

	* Define las columnas donde estan las diferencias a las cuales le vamos a poner *
	loc D "2 4 6"
	
	foreach d of local D {
		
		* Identifica la columna con el p-valor (a la derecha de la diferencia)
		loc e = `d' + 1

		* Convierte a string para poner asteriscos
		gen aux`d'=string(`a'`d',"%15.`decimales'f") if `a'`d' ~= .

		* Pone un * si significativo al 10%
		replace aux`d' = "(" + aux`d' + ")" + "*" if `a'`e' < 0.10	
		
		* Pone otro * si significativo al 5%
		replace aux`d' = aux`d' + "*" if `a'`e' < 0.05	
		
		* Pone otro * si significativo al 1%
		replace aux`d' = aux`d' + "*" if `a'`e' < 0.01
		
		replace aux`d' = "" if aux`d' == "." 		
		move aux`d'  `a'`e'
		drop `a'`d' `a'`e'
		}

	
	* Exporta a excel		
	export excel using "$out/`a'.xlsx", replace

	}
restore


*** BORN FEMALE ***
	
* Sample mean direct response with standard error
mat table6b=J(8,7,.)
local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	ci means d_`i' [w=factor] if sexo_nace==2
	mat table6b[`r',1]=r(mean)
	mat table6b[`r'+1,1]=r(se)
	
loc r=`r'+3
}

* 4. Basic Regressions
local lists "1 2 3"

* Run regressions
loc r=1
foreach i of local lists{
	reg l`i' grupo `controls' [w=factor] if sexo_nace==2, r
		
	lincom grupo
	mat table6b[`r',2]=r(estimate)
	mat table6b[`r'+1,2]=r(se)
	mat table6b[`r'+1,3]=r(p)
	
	loc ++i
	loc r=`r'+3
}	
	

* Sum
do "$dir/3. Programs/bootstrap_truefrac.do"

bootstrap r(sum_l1) r(sum_l2) r(sum_l3), seed(54321) reps($REPS) nowarn: bs_tf if sexo_nace==2, controls(`controls')
mat results=r(table)

local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	mat table6b[`r',4]=results[1,`i']
	mat table6b[`r'+1,4]=results[2,`i']
	mat table6b[`r'+1,5]=results[4,`i']
	
loc r=`r'+3
}


* Percentage increase
do "$dir/3. Programs/bootstrap_percchange.do"

bootstrap r(perc_l1) r(perc_l2) r(perc_l3), seed(54321) reps($REPS) nowarn: bs_perc if sexo_nace==2, controls(`controls')
mat results=r(table)

local lists "1 2 3"
loc r=1
foreach i of local lists {
	
	mat table6b[`r',6]=results[1,`i']
	mat table6b[`r'+1,6]=results[2,`i']
	mat table6b[`r'+1,7]=results[4,`i']
	
loc r=`r'+3
}

* Exports with asterisks

* Aqui va la lista completa de matrices 
loc asteriscos "table6b"

foreach a of local asteriscos {
	
	* Establece cuantos decimales se pone a las diferencias (default=3)
	loc decimales = 3

	* Carga matrices como datos para poder manipular texto
	drop _all
	svmat double `a'

	* Define las columnas donde estan las diferencias a las cuales le vamos a poner *
	loc D "2 4 6"
	
	foreach d of local D {
		
		* Identifica la columna con el p-valor (a la derecha de la diferencia)
		loc e = `d' + 1

		gen aux`d'=string(`a'`d',"%15.`decimales'f") if `a'`d' ~= .

		* Pone un * si significativo al 10%
		replace aux`d' = "(" + aux`d' + ")" + "*" if `a'`e' < 0.10	
		
		* Pone otro * si significativo al 5%
		replace aux`d' = aux`d' + "*" if `a'`e' < 0.05	
		
		* Pone otro * si significativo al 1%
		replace aux`d' = aux`d' + "*" if `a'`e' < 0.01
		
		replace aux`d' = "" if aux`d' == "." 		
		move aux`d'  `a'`e'
		drop `a'`d' `a'`e'
		}

	
	* Exporta a excel		
	export excel using "$out/`a'.xlsx", replace

	}
	
	
*---------------------------------------------------------------------------*
* Figure 2. Robustness test for list experiment
*---------------------------------------------------------------------------*

* Load Data	
use "$dir/2.List Experiment database.dta", clear

* 1. Build d_qi for each direct question
*Same sex attraction
gen d_2=0
replace d_2=1 if sexo_nace==1 & orientacion==1 | identidad==1 & orientacion==1 | identidad==3 & orientacion==1 | sexo_nace==2 & orientacion==2 | identidad==2 & orientacion==2 | identidad==4 & orientacion==2 

* 2. Build d_qi for each direct question
gen l2= l2_dir
replace l2= l2_ind if grupo==1
replace l2= l2+d_2
gen l4= l4_dir
replace l4= l4_ind if grupo==1

* 3. Basic regressions
loc controls "i.sexo_nace i.rango_edad i.est_civil i.educ2 i.zona"

gen educ2=.
replace educ2=1 if educ>=1 & educ<=4
replace educ2=2 if educ==5
replace educ2=3 if educ==6
replace educ2=4 if educ==7


reg l2 grupo `controls' [w=factor], r 
estimates store l2_est

ren grupo aux
gen grupo=aux==0

reg l4 grupo `controls' [w=factor], r
estimates store l4_est

* 4. Graph
coefplot (l2_est, label("List 2: Veiled Report") mcolor(ebblue) ciopts(lcolor(gs8))) (l4_est, label("List 4: Direct Report") mcolor(cranberry) ciopts(lcolor(gs8)) ) , ///
 keep(grupo) graphregion(fcolor(white)) plotregion(style(outline)) ///
mlabel format(%9.3f) mlabposition(12) mlabsize(medium) ciopts(recast(rcap)) ///
ylabel("") xlabel(0(0.05)0.4, labsize(small)) ///
title("", color(black) ) subtitle("") ///
xtitle("Estimated Prevalence") ///
legend(region(lwidth(none)))
graph export "$out/Figure2.pdf", replace	 


*---------------------------------------------------------------------------*
* Figure 3. Distribution of number of true/valid items for each list
*---------------------------------------------------------------------------*
* Load Data	
use "$dir/2.List Experiment database.dta", clear


* 1. Build d_qi for each direct question
*Gender identity
gen d_1=0
replace d_1=1 if sexo_nace!=identidad & identidad!=.
*Same sex attraction
gen d_2=0
replace d_2=1 if sexo_nace==1 & orientacion==1 | identidad==1 & orientacion==1 | identidad==3 & orientacion==1 | sexo_nace==2 & orientacion==2 | identidad==2 & orientacion==2 | identidad==4 & orientacion==2 
	//Should we just take into account identity and not sex?
	//Sexual orientation is supposed to be associated with gender
*Both sexes
gen d_3=0
replace d_3=1 if orientacion==3
*Compliance
gen d_4=d_2


* 2. Build y^d_qi= d_qi + c_qi
local lists "1 2 3"
foreach i of local lists {
	gen ld_`i'= l`i'_dir + d_`i'
}

gen ld_4= l4_ind + d_4

* 3. Build y_qi= y^d_qi + y^v_qi
foreach i of local lists {
	gen l`i'=.
	replace l`i'= ld_`i' if grupo==0
	replace l`i'= l`i'_ind if grupo==1
}

gen l4=.
replace l4= ld_4 if grupo==1
replace l4= l4_dir if grupo==0


* 4. Answers Distribution (Distribution of total number of yeses to each question)
** Ceiling Effect: Many subjects in a study have scores on a variable that are at or near the possible upper limit
**Problems: less variation and Bunching --> affects the expected change in the variable or its variation, affects distibutional properties (Skew)
loc outcomes "1 2 3 4" 
foreach i of local outcomes {

* Positioning
replace l`i'=l`i'-0.2 if grupo==0
replace l`i'=l`i'+0.2 if grupo==1	
	
twoway (histogram l`i' if grupo==0, percent bcolor(gs12) width(0.15)) (histogram l`i' if grupo==1, percent bcolor(ebblue) width(0.15)) , ///
	title("List `i'") ytitle("Percentage (%)") ylabel(0(10)50) xtitle("Number of True/Valid statements", size(medsmall)) ///
	xlabel(0 "0" 1 "1" 2 "2" 3 "3" 4 "4" 5 "5" , noticks) ///
	legend(region(lstyle(none)) size(small) position(6) col(2) order (1 "Direct Report" 2 "Veiled Report"))  ///	
	name(L`i', replace)
}
grc1leg L1 L2 L3 L4, sch(plotplain) title("") legendfrom(L1) iscale(*0.9)
graph export "$out/Figure3.pdf", replace


*-------------------------------------------------------------------------------------*
* Table 7. Change in sensitive answer reports for LGBT identification, by demographics
*-------------------------------------------------------------------------------------*
* Load Data	
use "$dir/2.List Experiment database.dta", clear

* Repetitions for bootstrap
global REPS=1000

* 1. Build d_qi for each direct question
*Gender identity
gen d_1=0
replace d_1=1 if sexo_nace!=identidad & identidad!=.
*Same sex attraction
gen d_2=0
replace d_2=1 if sexo_nace==1 & orientacion==1 | identidad==1 & orientacion==1 | identidad==3 & orientacion==1 | sexo_nace==2 & orientacion==2 | identidad==2 & orientacion==2 | identidad==4 & orientacion==2 
	//Should we just take into account identity and not sex?
	//Sexual orientation is supposed to be associated with gender
*Both sexes
gen d_3=0
replace d_3=1 if orientacion==3

* 2. Build y^d_qi= d_qi + c_qi
local lists "1 2 3"
foreach i of local lists {
	gen ld_`i'= l`i'_dir + d_`i'
}


* 3. Build y_qi= y^d_qi + y^v_qi
foreach i of local lists {
	gen l`i'=.
	replace l`i'= ld_`i' if grupo==0
	replace l`i'= l`i'_ind if grupo==1
}

gen l4= l4_dir
replace l4= l4_ind if grupo==1

gen educ2=.
replace educ2=1 if educ>=1 & educ<=4
replace educ2=2 if educ==5
replace educ2=3 if educ==6
replace educ2=4 if educ==7

gen agegroups=.
replace agegroups=1 if edad>=18 & edad<=29
replace agegroups=2 if edad>=30 & edad<=50
replace agegroups=3 if edad>=51 & edad<.

gen skill=.
replace skill=1 if educ2==1 | educ2==2
replace skill=2 if educ2==3 | educ2==4

gen marital=.
replace marital=1 if est_civil==1 | est_civil==2
replace marital=2 if est_civil==3 | est_civil==4

* Heterogeneity
mat heterogeneous=J(18,4,.)

* AGE GROUPS *
loc controls "i.sexo_nace i.marital i.skill i.zona"

forvalues l=1/3 {

	loc r=1
	forvalues g=1/3 {
	
		reg l`l' grupo `controls' [w=factor] if agegroups==`g', r
		mat heterogeneous[`r',4]=e(N)
	
	
		lincom grupo
		mat heterogeneous[`r',`l']=r(estimate)
		mat heterogeneous[`r'+1,`l']=r(se)
		
		loc r=`r'+2
	
	}
	
}

* EDUCATION *
loc controls "i.sexo_nace i.marital i.agegroups i.zona"

forvalues l=1/3 {

	loc r=9
	forvalues g=1/2 {
	
		reg l`l' grupo `controls' [w=factor] if skill==`g', r
		mat heterogeneous[`r',4]=e(N)
	
		lincom grupo
		mat heterogeneous[`r',`l']=r(estimate)
		mat heterogeneous[`r'+1,`l']=r(se)
		
		loc r=`r'+2
	
	}
	
}

* MARITAL STATUS *
loc controls "i.sexo_nace i.skill i.agegroups i.zona"

forvalues l=1/3 {

	loc r=15
	forvalues g=1/2 {
	
		reg l`l' grupo `controls' [w=factor] if marital==`g', r
		mat heterogeneous[`r',4]=e(N)
	
		lincom grupo
		mat heterogeneous[`r',`l']=r(estimate)
		mat heterogeneous[`r'+1,`l']=r(se)
		
		loc r=`r'+2
	
	}
	
}

preserve
drop _all
svmat double heterogeneous
export excel using "$out/Table7.xlsx", replace
restore
	
  
exit
*END
	